Taken from Steven Turner's Template for analysis with DESeq2 and slightly modified for my usecase/jupyter format.
In [ ]:
source("https://bioconductor.org/biocLite.R")
biocLite("DESeq2")
In [ ]:
library(DESeq2)
In [ ]:
install.packages("calibrate")
In [ ]:
install.packages("gplots")
In [ ]:
countdata = read.csv('data.csv', header=TRUE, row.names=1)
countdata = as.matrix(countdata)
head(countdata)
Experimental conditions and number of replicates
In [ ]:
(condition = factor(c(rep("control",3), rep("treatment",3))))
In [ ]:
(coldata = data.frame(row.names=colnames(countdata), condition))
In [ ]:
dds = DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition)
dds
In [ ]:
dds = DESeq(dds)
In [ ]:
plotDispEsts(dds, main="Dispersion plot")
In [ ]:
rld = rlogTransformation(dds)
In [ ]:
head(assay(rld))
In [ ]:
hist(assay(rld))
In [ ]:
library(RColorBrewer)
In [ ]:
(mycols = brewer.pal(8, "Dark2")[1:length(unique(condition))])
In [ ]:
sampleDists = as.matrix(dist(t(assay(rld))))
In [ ]:
library(gplots)
In [ ]:
heatmap(as.matrix(sampleDists), key=F, trace="none",
col=colorpanel(100, "black", "white"),
ColSideColors=mycols[condition], RowSideColor=mycols[condition],
margin=c(10,10), main="Sample Distance Matrix")
In [ ]:
rld_pca <- function (rld, intgroup = "condition", ntop = 500, colors=NULL, legendpos="bottomleft", main="PCA Biplot", textcx=1, ...) {
require(genefilter)
require(calibrate)
require(RColorBrewer)
rv = rowVars(assay(rld))
select = order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
pca = prcomp(t(assay(rld)[select, ]))
fac = factor(apply(as.data.frame(colData(rld)[, intgroup, drop = FALSE]), 1, paste, collapse = " : "))
if (is.null(colors)) {
if (nlevels(fac) >= 3) {
colors = brewer.pal(nlevels(fac), "Paired")
} else {
colors = c("black", "red")
}
}
pc1var <- round(summary(pca)$importance[2,1]*100, digits=1)
pc2var <- round(summary(pca)$importance[2,2]*100, digits=1)
pc1lab <- paste0("PC1 (",as.character(pc1var),"%)")
pc2lab <- paste0("PC1 (",as.character(pc2var),"%)")
plot(PC2~PC1, data=as.data.frame(pca$x), bg=colors[fac], pch=21, xlab=pc1lab, ylab=pc2lab, main=main, ...)
with(as.data.frame(pca$x), textxy(PC1, PC2, labs=rownames(as.data.frame(pca$x)), cex=textcx))
legend(legendpos, legend=levels(fac), col=colors, pch=20)
# rldyplot(PC2 ~ PC1, groups = fac, data = as.data.frame(pca$rld),
# pch = 16, cerld = 2, aspect = "iso", col = colours, main = draw.key(key = list(rect = list(col = colours),
# terldt = list(levels(fac)), rep = FALSE)))
}
In [ ]:
rld_pca(rld, colors=mycols, intgroup="condition")
In [ ]:
res = results(dds)
In [ ]:
table(res$padj<0.05)
In [ ]:
res = res[order(res$padj),]
resdata = merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] = "Gene"
head(resdata)
In [ ]:
write.csv(resdata, file="../results/d37bcm.deseq.csv")
In [ ]:
hist(res$pvalue, breaks=50, col="grey")
In [ ]:
maplot <- function (res, thresh=0.05, labelsig=TRUE, textcx=1, ...) {
with(res, plot(baseMean, log2FoldChange, pch=20, cex=.5, log="x", ...))
with(subset(res, padj<thresh), points(baseMean, log2FoldChange, col="red", pch=20, cex=1.5))
if (labelsig) {
require(calibrate)
with(subset(res, padj<thresh), textxy(baseMean, log2FoldChange, labs=Gene, cex=textcx, col=2))
}
}
In [ ]:
maplot(resdata, main="MA Plot")
In [ ]:
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) {
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main=main, ...))
with(subset(res, padj<sigthresh ), points(log2FoldChange, -log10(pvalue), pch=20, col="red", ...))
with(subset(res, abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="orange", ...))
with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="green", ...))
if (labelsig) {
require(calibrate)
with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=textcx, ...))
}
legend(legendpos, xjust=1, yjust=1, legend=c(paste("FDR<",sigthresh,sep=""), paste("|LogFC|>",lfcthresh,sep=""), "both"), pch=20, col=c("red","orange","green"))
}
In [ ]:
volcanoplot(resdata, lfcthresh=1, sigthresh=0.05, textcx=.8)
In [ ]: